library(tidyverse)
library(dplyr)
# install.packages("skimr")
library(skimr)
# install.packages("janitor")
library(janitor)
# install.packages("ggcorrplot")
library(ggcorrplot)
# load clean file
life_clean <- read.csv("../data/clean/life_clean.csv")
Error in file(file, "rt") : cannot open the connection
# looking at calories_burned as the y variable
# calories_burned: total calories burned during the session
summary(life_clean$calories_burned)
Min. 1st Qu. Median Mean 3rd Qu. Max.
323.1 910.8 1231.5 1280.1 1553.1 2890.8
ggplot(life_clean, aes(x = calories_burned)) +
geom_histogram(bins = 30, color = "purple", fill = "lightblue4")

# looking at a right skewed distribution with a mean about 1280 total calories burned
ggplot(life_clean, aes(x = calories_burned)) +
geom_boxplot(fill = "steelblue")

# boxplot shows some outliers on the higher calorie burn end
# looking at burns_calories_per_30min as the y variable
# burns_calories_per_30min: estimated calories burned in 30 min of "that" exercise
summary(life_clean$burns_calories_per_30min)
Min. 1st Qu. Median Mean 3rd Qu. Max.
128.8 337.6 348.6 344.0 360.5 381.7
ggplot(life_clean, aes(x = burns_calories_per_30min)) +
geom_histogram(bins = 30, color = "purple", fill = "lightblue4")

# looking at a left skewed distribution with a mean about 344 calories burned in a 30min session
ggplot(life_clean, aes(x = burns_calories_per_30min)) +
geom_boxplot(fill = "steelblue")

# boxplot shows an even distribution of outliers on the lower calorie burn
Explore Diet Variables
Using calories_burned for the dependent variable
ggplot(life_clean, aes(x = carbs, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = proteins, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = fats, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = calories, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = sugar_g, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = diet_type, y = calories_burned)) +
geom_boxplot()

ggplot(life_clean, aes(x = diet_type, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = burns_calories_bin, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

mean(life_clean$carbs)
[1] 249.7786
mean(life_clean$proteins)
[1] 99.91829
mean(life_clean$fats)
[1] 66.61217
mean(life_clean$calories)
[1] 2024.418
mean(life_clean$sugar_g)
[1] 24.53104
mean(life_clean$cholesterol_mg)
[1] 148.4656
mean(life_clean$cal_balance)
[1] 744.3087
Explore Exercise Variables
Using calories_burned for the dependent variable
ggplot(life_clean, aes(x = max_bpm, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = resting_bpm, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = session_duration_hours, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = workout_type, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = workout_frequency_days_week, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = sets, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = reps, y = calories_burned)) +
geom_point() +
geom_smooth(method = "lm")

ggplot(life_clean, aes(x = workout_type, y = calories_burned)) +
geom_boxplot()

num_vars <- life_clean |> select(where(is.numeric))
corr <- cor(num_vars, use = "complete.obs")
ggcorrplot::ggcorrplot(corr, lab = FALSE, hc.order = TRUE, tl.cex = 6, tl.srt = 45)

LS0tCnRpdGxlOiAiRGF0YSBFeHBsb3JhdGlvbiAtIENhbG9yaWVzIFByb2plY3QiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShkcGx5cikKIyBpbnN0YWxsLnBhY2thZ2VzKCJza2ltciIpCmxpYnJhcnkoc2tpbXIpCiMgaW5zdGFsbC5wYWNrYWdlcygiamFuaXRvciIpCmxpYnJhcnkoamFuaXRvcikKIyBpbnN0YWxsLnBhY2thZ2VzKCJnZ2NvcnJwbG90IikKbGlicmFyeShnZ2NvcnJwbG90KQpgYGAKCmBgYHtyfQojIGxvYWQgY2xlYW4gZmlsZQpsaWZlX2NsZWFuIDwtIHJlYWQuY3N2KCIuLi9kYXRhL2NsZWFuL2xpZmVfY2xlYW4uY3N2IikKCmdsaW1wc2UobGlmZV9jbGVhbikKc2tpbXI6OnNraW0obGlmZV9jbGVhbikKYGBgCgpgYGB7cn0KIyBsb29raW5nIGF0IGNhbG9yaWVzX2J1cm5lZCBhcyB0aGUgeSB2YXJpYWJsZQojIGNhbG9yaWVzX2J1cm5lZDogdG90YWwgY2Fsb3JpZXMgYnVybmVkIGR1cmluZyB0aGUgc2Vzc2lvbgoKc3VtbWFyeShsaWZlX2NsZWFuJGNhbG9yaWVzX2J1cm5lZCkKCmdncGxvdChsaWZlX2NsZWFuLCBhZXMoeCA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gMzAsIGNvbG9yID0gInB1cnBsZSIsIGZpbGwgPSAibGlnaHRibHVlNCIpCgojIGxvb2tpbmcgYXQgYSByaWdodCBza2V3ZWQgZGlzdHJpYnV0aW9uIHdpdGggYSBtZWFuIGFib3V0IDEyODAgdG90YWwgY2Fsb3JpZXMgYnVybmVkCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSBjYWxvcmllc19idXJuZWQpKSArCiAgZ2VvbV9ib3hwbG90KGZpbGwgPSAic3RlZWxibHVlIikKCiMgYm94cGxvdCBzaG93cyBzb21lIG91dGxpZXJzIG9uIHRoZSBoaWdoZXIgY2Fsb3JpZSBidXJuIGVuZAoKYGBgCmBgYHtyfQojIGxvb2tpbmcgYXQgYnVybnNfY2Fsb3JpZXNfcGVyXzMwbWluIGFzIHRoZSB5IHZhcmlhYmxlCiMgYnVybnNfY2Fsb3JpZXNfcGVyXzMwbWluOiBlc3RpbWF0ZWQgY2Fsb3JpZXMgYnVybmVkIGluIDMwIG1pbiBvZiAidGhhdCIgZXhlcmNpc2UKCnN1bW1hcnkobGlmZV9jbGVhbiRidXJuc19jYWxvcmllc19wZXJfMzBtaW4pCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSBidXJuc19jYWxvcmllc19wZXJfMzBtaW4pKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDMwLCBjb2xvciA9ICJwdXJwbGUiLCBmaWxsID0gImxpZ2h0Ymx1ZTQiKQoKIyBsb29raW5nIGF0IGEgbGVmdCBza2V3ZWQgZGlzdHJpYnV0aW9uIHdpdGggYSBtZWFuIGFib3V0IDM0NCBjYWxvcmllcyBidXJuZWQgaW4gYSAzMG1pbiBzZXNzaW9uCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSBidXJuc19jYWxvcmllc19wZXJfMzBtaW4pKSArCiAgZ2VvbV9ib3hwbG90KGZpbGwgPSAic3RlZWxibHVlIikKCiMgYm94cGxvdCBzaG93cyBhbiBldmVuIGRpc3RyaWJ1dGlvbiBvZiBvdXRsaWVycyBvbiB0aGUgbG93ZXIgY2Fsb3JpZSBidXJuIAoKYGBgCgojIEV4cGxvcmUgRGlldCBWYXJpYWJsZXMKVXNpbmcgY2Fsb3JpZXNfYnVybmVkIGZvciB0aGUgZGVwZW5kZW50IHZhcmlhYmxlCgpgYGB7cn0KZ2dwbG90KGxpZmVfY2xlYW4sIGFlcyh4ID0gY2FyYnMsIHkgPSBjYWxvcmllc19idXJuZWQpKSArCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iKQoKZ2dwbG90KGxpZmVfY2xlYW4sIGFlcyh4ID0gcHJvdGVpbnMsIHkgPSBjYWxvcmllc19idXJuZWQpKSArCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iKQoKZ2dwbG90KGxpZmVfY2xlYW4sIGFlcyh4ID0gZmF0cywgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSBjYWxvcmllcywgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSBzdWdhcl9nLCB5ID0gY2Fsb3JpZXNfYnVybmVkKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIikKCmdncGxvdChsaWZlX2NsZWFuLCBhZXMoeCA9IGRpZXRfdHlwZSwgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX2JveHBsb3QoKQoKZ2dwbG90KGxpZmVfY2xlYW4sIGFlcyh4ID0gZGlldF90eXBlLCB5ID0gY2Fsb3JpZXNfYnVybmVkKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIikKCmdncGxvdChsaWZlX2NsZWFuLCBhZXMoeCA9IGJ1cm5zX2NhbG9yaWVzX2JpbiwgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCmBgYAoKYGBge3J9Cm1lYW4obGlmZV9jbGVhbiRjYXJicykKbWVhbihsaWZlX2NsZWFuJHByb3RlaW5zKQptZWFuKGxpZmVfY2xlYW4kZmF0cykKbWVhbihsaWZlX2NsZWFuJGNhbG9yaWVzKQptZWFuKGxpZmVfY2xlYW4kc3VnYXJfZykKbWVhbihsaWZlX2NsZWFuJGNob2xlc3Rlcm9sX21nKQptZWFuKGxpZmVfY2xlYW4kY2FsX2JhbGFuY2UpCgpgYGAKCiMgRXhwbG9yZSBFeGVyY2lzZSBWYXJpYWJsZXMKVXNpbmcgY2Fsb3JpZXNfYnVybmVkIGZvciB0aGUgZGVwZW5kZW50IHZhcmlhYmxlCgpgYGB7cn0KZ2dwbG90KGxpZmVfY2xlYW4sIGFlcyh4ID0gbWF4X2JwbSwgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSByZXN0aW5nX2JwbSwgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSBzZXNzaW9uX2R1cmF0aW9uX2hvdXJzLCB5ID0gY2Fsb3JpZXNfYnVybmVkKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIikKCmdncGxvdChsaWZlX2NsZWFuLCBhZXMoeCA9IHdvcmtvdXRfdHlwZSwgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSB3b3Jrb3V0X2ZyZXF1ZW5jeV9kYXlzX3dlZWssIHkgPSBjYWxvcmllc19idXJuZWQpKSArCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iKQoKZ2dwbG90KGxpZmVfY2xlYW4sIGFlcyh4ID0gc2V0cywgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIpCgpnZ3Bsb3QobGlmZV9jbGVhbiwgYWVzKHggPSByZXBzLCB5ID0gY2Fsb3JpZXNfYnVybmVkKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIikKCmdncGxvdChsaWZlX2NsZWFuLCBhZXMoeCA9IHdvcmtvdXRfdHlwZSwgeSA9IGNhbG9yaWVzX2J1cm5lZCkpICsKICBnZW9tX2JveHBsb3QoKQoKYGBgCgpgYGB7cn0KbnVtX3ZhcnMgPC0gbGlmZV9jbGVhbiB8PiBzZWxlY3Qod2hlcmUoaXMubnVtZXJpYykpCgpjb3JyIDwtIGNvcihudW1fdmFycywgdXNlID0gImNvbXBsZXRlLm9icyIpCgpnZ2NvcnJwbG90OjpnZ2NvcnJwbG90KGNvcnIsIGxhYiA9IEZBTFNFLCBoYy5vcmRlciA9IFRSVUUsIHRsLmNleCA9IDYsIHRsLnNydCA9IDQ1KQoKYGBgCgpgYGB7cn0KCmBgYAoK